#setup
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 10000)
from bokeh.charts.operations import blend
from bokeh.charts import Bar, Donut, show
from bokeh.io import output_notebook
from bokeh.charts.attributes import CatAttr
from bokeh.models import HoverTool
import folium
output_notebook()
import IPython as ipython
from bokeh.resources import CDN
from bokeh.embed import file_html
# This line will hide code by default when the notebook is exported as HTML
import IPython.core.display as di
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/uxqp6gdwxapt0e8/RRIMAReportSummaryPage.html?dl=0",width=1000,height=800)
#import cleaned version of Izmir beneficiary data - removed merged cells in headers etc
izmir = pd.read_excel('/Users/samlilienfeld/Google Drive/RRIMA/RRIMA Datasets_Forms/Datasets/Turkey Datasets/Clean datasets/CLEAN Beneficiary Database.31052016.xlsx')
#import cleaned v2 greece data
combined_v2 = pd.read_excel('/Users/samlilienfeld/Google Drive/RRIMA/RRIMA Datasets_Forms/Datasets/Greece Datasets/Combined 220616 v2.xlsx',sheetname='Final')
#cut data to columns for time series counting and SADD analysis
izmir_lim = izmir.loc[:,['Gender','Age','MC_OLD_M>=60','MC_OLD_F>=60','MC_OLD_M>18','MC_OLD_F>18','MC_OLD_18>M>5','MC_OLD_18>F>5',
'MC_OLD_M_Child<5','MC_OLD_F_Child<5','MC_OLD_M>0<=1','MC_OLD_F>0<=1','Date to Delivery','Date to insert','Date of Entery']]
#fill missing age values with 0
izmir_lim['Age'] = izmir_lim.Age.fillna(0)
izmir_lim['Age'] = izmir_lim.Age.astype(int)
mc_old_groups = ['MC_OLD_M>=60','MC_OLD_F>=60','MC_OLD_M>18','MC_OLD_F>18','MC_OLD_18>M>5','MC_OLD_18>F>5',
'MC_OLD_M_Child<5','MC_OLD_F_Child<5','MC_OLD_M>0<=1','MC_OLD_F>0<=1']
for group in mc_old_groups:
izmir_lim[group] = izmir_lim[group].fillna(0)
izmir_lim[group] = izmir_lim[group].astype(int)
#code sex and age group of the beneficiary being surveyed - add to already disaggregated family members
#izmir_lim['MC_OLD_M>=60'] = np.where((izmir_lim.Gender == 'M') & (izmir_lim.Age >= 60),izmir_lim['MC_OLD_M>=60'] + 1, izmir_lim['MC_OLD_M>=60'])
#izmir_lim['MC_OLD_F>=60'] = np.where((izmir_lim.Gender == 'F') & (izmir_lim.Age >= 60),izmir_lim['MC_OLD_F>=60'] + 1, izmir_lim['MC_OLD_F>=60'])
#izmir_lim['MC_OLD_M>18'] = np.where((izmir_lim.Gender == 'M') & (izmir_lim.Age < 60) & (izmir_lim.Age > 18),izmir_lim['MC_OLD_M>18'] + 1, izmir_lim['MC_OLD_M>18'])
#izmir_lim['MC_OLD_F>18'] = np.where((izmir_lim.Gender == 'F') & (izmir_lim.Age < 60) & (izmir_lim.Age > 18),izmir_lim['MC_OLD_F>18'] + 1, izmir_lim['MC_OLD_F>18'])
#izmir_lim['MC_OLD_18>M>5'] = np.where((izmir_lim.Gender == 'M') & (izmir_lim.Age <= 18) & (izmir_lim.Age >= 5),izmir_lim['MC_OLD_18>M>5'] + 1, izmir_lim['MC_OLD_18>M>5'])
#izmir_lim['MC_OLD_18>F>5'] = np.where((izmir_lim.Gender == 'F') & (izmir_lim.Age <= 18) & (izmir_lim.Age >= 5),izmir_lim['MC_OLD_18>F>5'] + 1, izmir_lim['MC_OLD_18>F>5'])
#izmir_lim['MC_OLD_M_Child<5'] = np.where((izmir_lim.Gender == 'M') & (izmir_lim.Age < 5) & (izmir_lim.Age > 1),izmir_lim['MC_OLD_M_Child<5'] + 1, izmir_lim['MC_OLD_M_Child<5'])
#izmir_lim['MC_OLD_F_Child<5'] = np.where((izmir_lim.Gender == 'F') & (izmir_lim.Age < 5) & (izmir_lim.Age > 1),izmir_lim['MC_OLD_F_Child<5'] + 1, izmir_lim['MC_OLD_F_Child<5'])
#izmir_lim['MC_OLD_M>0<=1'] = np.where((izmir_lim.Gender == 'M') & (izmir_lim.Age <= 1),izmir_lim['MC_OLD_M>0<=1'] + 1, izmir_lim['MC_OLD_M>0<=1'])
#izmir_lim['MC_OLD_F>0<=1'] = np.where((izmir_lim.Gender == 'F') & (izmir_lim.Age <= 1),izmir_lim['MC_OLD_F>0<=1'] + 1, izmir_lim['MC_OLD_F>0<=1'])
#note unknown age when age is unknown
#izmir_lim['Sex or Age Unknown'] = np.where((izmir_lim.Age == 0) | (izmir_lim.Gender == ''),1,0)
#cut data to just the age groups and counts, then transpose to long format
#then aggregate data into input for graph
izmir_trans = pd.melt(izmir_lim.drop(['Age','Gender','Date to insert','Date of Entery','Date to Delivery'],axis=1))
izmir_agg = izmir_trans.groupby('variable').aggregate({'value':'sum'}).reset_index().rename(columns={'value':'Total_Beneficiaries'})
#extract gender from family member fields to code as a separate field
genders = {'MC_OLD_18>F>5':'Female',
'MC_OLD_18>M>5':'Male',
'MC_OLD_F>0<=1':'Female',
'MC_OLD_F>18':'Female',
'MC_OLD_F>=60':'Female',
'MC_OLD_F_Child<5':'Female',
'MC_OLD_M>0<=1':'Male',
'MC_OLD_M>18':'Male',
'MC_OLD_M>=60':'Male',
'MC_OLD_M_Child<5':'Male'}
izmir_agg['gender_text'] = izmir_agg.variable.map(lambda x: genders[x])
#rename age groups to strip gender portion
ages = {'MC_OLD_18>F>5':'5 - 17','MC_OLD_18>M>5':'5 - 17','MC_OLD_F>0<=1':'Under 5',
'MC_OLD_M>0<=1':'Under 5','MC_OLD_F>18':'18 - 59','MC_OLD_M>18':'18 - 59',
'MC_OLD_F>=60':'60+','MC_OLD_M>=60':'60+','MC_OLD_M_Child<5':'Under 5',
'MC_OLD_F_Child<5':'Under 5'}
izmir_agg['age_group'] = izmir_agg.variable.map(lambda x: ages[x])
#want to sort data by age group youngest to oldest, and then show gender disags on graph
age_group_sort = {'Under 5':1,'5 - 17':2,'18 - 59':3,'60+':4}
izmir_agg['srt'] = izmir_agg.age_group.map(lambda x: age_group_sort[x])
izmir_agg.sort_values(by='srt',ascending=True,inplace=True)
#split combined v2 data into datasets by nationality
countries = combined_v2.groupby('Country')
dictionary_countries = dict(list(countries))
#now create SADD bar charts for Greece and Serbia beneficiaries
gender_code = {'Total number of MEN over age 60':'Male','Total number of WOMEN over age 60':'Female',
'Total number of MEN between age 18 to 59':'Male','Total number of WOMEN between age 18 to 59':'Female',
'Total number of MEN between age 5 to 17':'Male','Total number of WOMEN between age 5 to 17':'Female',
'Total number of BOYS under 5 years old':'Male','Total number of GIRLS under 5 years old':'Female'}
age_code = {'Total number of MEN over age 60':'60+','Total number of WOMEN over age 60':'60+',
'Total number of MEN between age 18 to 59':'18 - 59','Total number of WOMEN between age 18 to 59':'18 - 59',
'Total number of MEN between age 5 to 17':'5 - 17','Total number of WOMEN between age 5 to 17':'5 - 17',
'Total number of BOYS under 5 years old':'Under 5','Total number of GIRLS under 5 years old':'Under 5'}
#save country level aggregated data from greece and serbia for later use
agg_sets = {}
for country in dictionary_countries:
temp = dictionary_countries[country].loc[:,['Total number of MEN over age 60','Total number of WOMEN over age 60',
'Total number of MEN between age 18 to 59','Total number of WOMEN between age 18 to 59',
'Total number of MEN between age 5 to 17','Total number of WOMEN between age 5 to 17',
'Total number of BOYS under 5 years old','Total number of GIRLS under 5 years old']]
temp_trans = pd.melt(temp)
temp_agg = temp_trans.groupby('variable').aggregate({'value':'sum'}).reset_index().rename(columns={'value':'Total_Beneficiaries'})
temp_agg['gender_text'] = temp_agg.variable.map(lambda x: gender_code[x])
temp_agg['age_group'] = temp_agg.variable.map(lambda x: age_code[x])
temp_agg['country'] = country
temp_agg['srt'] = temp_agg.age_group.map(lambda x: age_group_sort[x])
temp_agg.sort_values(by='srt',ascending=True,inplace=True)
agg_sets[country] = temp_agg
#combine all datasets and sort for column order
izmir_agg['country'] = 'Izmir'
all_agg = izmir_agg.append(agg_sets['Greece']).append(agg_sets['Serbia'])
all_agg['srt'] = all_agg.age_group.map(lambda x: age_group_sort[x])
all_agg.sort_values(by=['srt','country'],ascending=[True,True],inplace=True)
izmir = izmir.rename(columns={'From which region in their country are they displaced?':'from_where'})
#sum izmir bene data by nationality and district
izmir['from_where_district'] = izmir.groupby(['from_where','District'])['Total Number of family'].transform(sum)
izmir_district_agg = izmir[['from_where','District','from_where_district']].drop_duplicates()
izmir_district_agg = izmir_district_agg[izmir_district_agg.from_where_district.notnull()]
geo_map = {'KarabaÄŸlar':[38.3466, 27.0441],
'Konak':[38.4145, 27.1441],
'Torbalı':[38.1928, 27.3851],
'Bornova':[38.4627, 27.2441],
'Bayındır':[38.2219, 27.6527],
'Buca':[38.3337, 27.2911],
'Åžirinyer':[38.3937986,27.148165],
'Gaziemir':[38.3231, 27.1382],
'Karşıyaka':[38.5184, 27.1382]}
izmir_district_agg = izmir_district_agg[izmir_district_agg.from_where_district != 0]
#aggregate benes at district level for looping and bubble sizing
izmir_district_agg['district_total'] = izmir_district_agg.groupby('District').from_where_district.transform(sum)
izmir_district_totals = izmir_district_agg[['District','district_total']].drop_duplicates()
izmir_district_totals['geo'] = izmir_district_totals.District.map(lambda x: geo_map[x.strip()])
izmir_district_totals = izmir_district_totals.reset_index()
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/f0dmoyyklgqn751/RRIMAReport1MapText.html?dl=0",width=1000,height=300)
#insert folium map - shooting to start with aegean map with three bubbles showing total benes in the three locations
map_aegean = folium.Map(location=[38.3466, 27.0441],zoom_start=10,height=1000,width=1000)
#add circle markers for number of benes in each location - try and loop through df
for i in range (0,len(izmir_district_totals)):
popup_table = izmir_district_agg.loc[izmir_district_agg.District == izmir_district_totals.ix[i,1],['from_where','from_where_district']]
popup_table['Beneficiary Location'] = izmir_district_totals.ix[i,1]
popup_table = popup_table.rename(columns={'from_where':'Original Location','from_where_district':'Beneficiaries'})
popup_table = popup_table[['Beneficiary Location','Original Location','Beneficiaries']]
popup_table['Beneficiaries'] = popup_table.Beneficiaries.astype(int)
popup_table.sort_values(by='Beneficiaries',ascending=False,inplace=True)
chart = Bar(popup_table,label=CatAttr(columns=['Original Location'],sort=False),values='Beneficiaries',
title=izmir_district_totals.ix[i,1],xlabel='Original Location in Syria',ylabel='Beneficiaries',plot_width=700)
hover = HoverTool(point_policy='follow_mouse')
hover.tooltips=[('Beneficiaries','@height'),('Current Location',izmir_district_totals.ix[i,1]),('Original Location in Syria','@{Original Location}')]
chart.add_tools(hover)
html = file_html(chart, CDN, "my plot")
#html = popup_table.to_html(index=False)
iframe = folium.element.IFrame(html=html, width=1200, height=800)
popup = folium.Popup(iframe, max_width=700)
marker = folium.CircleMarker(izmir_district_totals.ix[i,3],
radius=izmir_district_totals.ix[i,2] * 2,
color='#3186cc',
fill_color='#3186cc',
popup=popup)
marker.add_to(map_aegean)
#map_aegean.save('aegean_map.html')
map_aegean
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/1x8ws404pzn1pot/RRIMAReport1SADD1.html?dl=0",width=1000,height=300)
#Izmir beneficiary graph
izmir_benes_graph = Bar(izmir_agg,label=CatAttr(columns=['age_group'],sort=False),values='Total_Beneficiaries',group='gender_text',
xlabel='Age Group',ylabel='Beneficiaries',title='Izmir',
plot_width=900,legend='top_right',yscale='2500')
hover = HoverTool(point_policy='follow_mouse')
hover.tooltips = [("Beneficiaries", "@height"),("Sex", "@gender_text"),("Age Group", "@age_group")]
izmir_benes_graph.add_tools(hover)
show(izmir_benes_graph)
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/xk25cyf9daiyh7k/RRIMAReport1SADD3.html?dl=0",width=1000,height=200)
#greece and serbia beneficiary graphs
country_order = ['Greece','Serbia']
title_order = ['Greece','Balkans']
greece_serbia_sadd = {}
for i in range (0,len(country_order)):
chart_title = title_order[i]
ylabel = 'Beneficiaries'
graph = Bar(agg_sets[country_order[i]],label=CatAttr(columns=['age_group'],sort=False),values='Total_Beneficiaries',group='gender_text',
agg='sum',title=chart_title,legend='top_right',plot_width=900,xlabel='Age Group',ylabel=ylabel)
hover = HoverTool(point_policy='follow_mouse')
hover.tooltips = [("Beneficiaries", "@height"),("Sex", "@gender_text"),("Age Group", "@age_group")]
graph.add_tools(hover)
greece_serbia_sadd[country_order[i]] = graph
show(greece_serbia_sadd['Greece'])
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/vh21lmuk32n90t4/RRIMAReportSADD4.html?dl=0",width=1000,height=200)
show(greece_serbia_sadd['Serbia'])
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/6hfce7lan3r0pyt/RRIMAReport1SADD5.html?dl=0",width=1000,height=200)
#create bar graph
all_agg['country_display'] = np.where(all_agg.country == 'Serbia', 'Balkans', all_agg.country)
def country_srt(x):
if x == 'Izmir':
return 1
elif x == 'Greece':
return 2
elif x == 'Balkans':
return 3
all_agg['srt_group'] = all_agg.country_display.map(country_srt)
all_agg.sort_values(by=['srt','srt_group'],ascending=[True,True],inplace=True)
all_agg_graph = Bar(all_agg,label=CatAttr(columns=['age_group'],sort=False),values='Total_Beneficiaries',group=['country_display','gender_text'],
xlabel='Age Group',ylabel='Beneficiaries',legend='top_right',plot_width=900,
color=['Red','Salmon','LightBlue','MediumBlue','Lime','LimeGreen'],
title='Izmir, Greece, and Balkans')
hover = HoverTool(point_policy='follow_mouse')
hover.tooltips = [("Beneficiaries", "@height"),("Sex", "@gender_text"),("Age Group", "@age_group"),("Country","@country_display")]
all_agg_graph.add_tools(hover)
show(all_agg_graph)
#standardize colors
css_colors = ['Black', 'Aqua', 'Blue', 'Fuchsia', 'Gray', 'Green', 'Lime', 'Maroon', 'Navy', 'Olive', 'Orange', 'Purple', 'Red', 'Silver', 'Teal', 'White', 'Yellow']
#get all nationalities
izmir_nationalities = izmir[['Nationality']].drop_duplicates()
v2_nationalities = combined_v2[['Head of Household Country of Birth']].drop_duplicates().rename(columns={'Head of Household Country of Birth':'Nationality'})
all_nationalities = izmir_nationalities.append(v2_nationalities).drop_duplicates().reset_index()
all_nationalities.drop('index',axis=1,inplace=True)
all_nationalities.reset_index(inplace=True)
all_nationalities.Nationality.fillna('Unknown')
def map_color(x):
x_adj = x % len(css_colors)
color = css_colors[x_adj]
return color
all_nationalities['color'] = all_nationalities.index.map(map_color)
#aggregate izmir beneficiaries by reported nationality
izmir['Nationality'] = np.where(izmir.Nationality.isnull(), 'Unknown', izmir.Nationality)
izmir['total_nationalities'] = izmir.groupby(['Nationality'])['Total Number of family'].transform(sum)
izmir_nationality_agg = izmir[['Nationality','total_nationalities']].drop_duplicates()
izmir_nationality_agg = izmir_nationality_agg.merge(all_nationalities,how='left',on='Nationality')
izmir_nationality_agg.sort_values(by='index',ascending=True,inplace=True)
izmir_nationality_agg['ranked'] = izmir_nationality_agg.total_nationalities.rank(ascending=False)
izmir_nationality_agg['NationalityOther'] = np.where(izmir_nationality_agg.ranked > 5, 'Other', izmir_nationality_agg.Nationality)
izmir_nationality_agg['NationalityList'] = ''
for nationality in izmir_nationality_agg.NationalityOther.tolist():
izmir_nationality_agg['NationalityList'] = np.where(izmir_nationality_agg.NationalityOther == nationality,
izmir_nationality_agg.loc[izmir_nationality_agg.NationalityOther == nationality].Nationality.values,izmir_nationality_agg.NationalityList)
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/h9n2kay3ncdfchh/RRIMAReport1CO1.html?dl=0",width=1000,height=300)
#create graph of Izmir beneficiaries by reported nationality
izmir_nationalities = Donut(izmir_nationality_agg,label=CatAttr(columns=['NationalityOther'],sort=False),values='total_nationalities',plot_width=800,plot_height=800,
title='Izmir')
hover = HoverTool(point_policy='follow_mouse')
hover.tooltips = [('Beneficiaries','@values'),('Country of Origin','@NationalityOther')]
izmir_nationalities.add_tools(hover)
show(izmir_nationalities)
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/blpbs2thwwjqsog/RRIMAReport1CO3.html?dl=0",width=1000,height=200)
#pie charts for nationality
greece_serbia_nationality = {}
for i in range (0,len(country_order)):
chart_title = title_order[i]
dictionary_countries[country_order[i]] = dictionary_countries[country_order[i]].rename(columns={'Head of Household Country of Birth':'Nationality'})
#aggregate data for nationality level ranking
dictionary_countries[country_order[i]]['NationalityTotal'] = dictionary_countries[country_order[i]].groupby('Nationality')['Total number of members in group'].transform(sum)
dictionary_countries[country_order[i]] = dictionary_countries[country_order[i]].loc[:,['Nationality','NationalityTotal']]
dictionary_countries[country_order[i]] = dictionary_countries[country_order[i]].drop_duplicates()
#combine countries other than top 5 into other
dictionary_countries[country_order[i]]['ranked'] = dictionary_countries[country_order[i]].NationalityTotal.rank(ascending=False)
dictionary_countries[country_order[i]]['NationalityOther'] = np.where(dictionary_countries[country_order[i]].ranked > 5, 'Other', dictionary_countries[country_order[i]].Nationality)
graph = Donut(dictionary_countries[country_order[i]],label=CatAttr(columns=['NationalityOther'],sort=False),values='NationalityTotal',
agg='sum',plot_width=800,plot_height=800,title=chart_title)
hover = HoverTool(point_policy='follow_mouse')
hover.tooltips = [("Beneficiaries", "@values"),("Country of Origin", "@NationalityOther"),]
graph.add_tools(hover)
greece_serbia_nationality[country_order[i]] = graph
show(greece_serbia_nationality['Greece'])
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/uabsqg7vwd1dy64/RRIMAReport1CO4.html?dl=0",width=1000,height=200)
show(greece_serbia_nationality['Serbia'])
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/u7xrvsdvuyk1fui/RRIMAReport1CO5.html?dl=0",width=1000,height=200)
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/c5u30iozid2jqi7/RRIMAReport1NextSteps.html?dl=0",width=1000,height=500)
ipython.display.IFrame("https://dl.dropboxusercontent.com/s/movsluo0215nlj5/RRIMAREPORT1Footnotes.html?dl=0",width=1000,height=300)
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)